Parametri globali

In [1]:
C=10000
nfold=10

Import librerie

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re
from nltk.corpus import stopwords
from nltk.stem.snowball import SnowballStemmer
from nltk.tokenize import word_tokenize 
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
import seaborn as sns
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
import unicodedata
from lime import lime_text

Import dataset

In [3]:
df = pd.read_excel('./politica.xlsx', sheet_name="Foglio1")
print(f'Found {len(df)} texts.')

print(f'{df["cap_maj_master"].isnull().sum()} document(s) with no classification removed')
df=df[pd.notnull(df['cap_maj_master'])]

print(f'{df["testo"].isnull().sum()} document(s) with no text removed')
df=df[pd.notnull(df['testo'])]

classes = [int(c) for c in df['cap_maj_master']]
documents = [d for d in df['testo']]
Found 5674 texts.
2 document(s) with no classification removed
424 document(s) with no text removed
In [4]:
y = np.bincount(classes)
x = np.arange(len(y))
fig, ax = plt.subplots()
plt.bar(x, y,width=0.7)
ax.set_xticks(x)
ax.set_aspect('auto')
plt.show()

preprocessing

In [5]:
def preprocessor(text):
    text = re.sub('<[^>]*>', ' ', str(text))
    text=re.sub('\d+',' ',str(text))
    text=re.sub('[ᆱᄏ]','',str(text))
    emoticons = re.findall('(?::|;|=)(?:-)?(?:\)|\(|D|P)',
                           str(text))
    text = (re.sub('[\W]+', ' ', text.lower()) + ' ' +
            ' '.join(emoticons).replace('-', ''))
    return text

def strip_accents(text):
    """
    Strip accents from input String.

    :param text: The input string.
    :type text: String.

    :returns: The processed String.
    :rtype: String.
    """
    try:
        text = unicode(text, 'utf-8')
    except (TypeError, NameError): # unicode is a default on python 3 
        pass
    text = unicodedata.normalize('NFD', text)
    text = text.encode('ascii', 'ignore')
    text = text.decode("utf-8")
    return str(text)

stop=set(stopwords.words('italian'))

def tokenizer_porter(text):
    word_tokens = word_tokenize(text)
    stemmer = SnowballStemmer("italian", ignore_stopwords=True)
    return [stemmer.stem(word) for word in word_tokens]
  

def tokenizer(text):
    stop=set(stopwords.words('italian'))
    word_tokens = word_tokenize(text)
    filtered_sentence = [w for w in word_tokens if not w in stop]
    filtered_sentence = [w for w in filtered_sentence if len(w)>3]
    return filtered_sentence

create vocabolary

In [6]:
tfidf = TfidfVectorizer(strip_accents=strip_accents,
                        lowercase=False,
                        preprocessor=preprocessor,
                        tokenizer=tokenizer_porter,
                        stop_words=stop,
                        min_df = 4
                       )

final_features = tfidf.fit_transform(documents)
In [7]:
X_names=tfidf.get_feature_names()
x_best_s=SelectKBest(chi2, k="all")
p_value_limit = 0.95
In [8]:
sns.heatmap(final_features.todense()[:,np.random.randint(0,final_features.shape[1],100)]==0, vmin=0, vmax=1, cbar=False).set_title('Sparse Matrix Sample')
Out[8]:
Text(0.5, 1.0, 'Sparse Matrix Sample')
In [9]:
dtf_features = pd.DataFrame()
for cat in np.unique(classes):
    appoggio=(cat==classes)
    appoggio.astype(int)
    xbest=x_best_s.fit_transform(final_features, appoggio)
    p = x_best_s.pvalues_
    dtf_features = dtf_features.append(pd.DataFrame(
                   {"feature":X_names, "score":1-p, "y":cat}))
    dtf_features = dtf_features.sort_values(["y","score"], 
                    ascending=[True,False])
    dtf_features = dtf_features[dtf_features["score"]>p_value_limit]
X_names = dtf_features["feature"].unique().tolist()
In [10]:
for cat in np.unique(classes):
    print("# {}:".format(cat))
    print("  . selected features:",
         len(dtf_features[dtf_features["y"]==cat]))
    print("  . top features:", ",".join(
dtf_features[dtf_features["y"]==cat]["feature"].values[:10]))
    print(" ")
# 1:
  . selected features: 136
  . top features: mezzogiorn,fiat,industrial,inflazion,fiscal,produtt,tern,occupazional,imeres,eni
 
# 2:
  . selected features: 163
  . top features: crocifiss,islam,omosessual,religion,relig,unar,razzial,cattol,confession,cult
 
# 3:
  . selected features: 211
  . top features: farmac,medic,ospedal,ospedalier,pazient,sal,sanitar,vaccin,embrion,malatt
 
# 4:
  . selected features: 182
  . top features: agricol,agricoltor,agricoltur,allev,animal,latt,pesc,produttor,produzion,bovin
 
# 5:
  . selected features: 115
  . top features: lavor,pension,pensionist,infortun,sindacal,inps,forner,previdenzial,voucher,disoccup
 
# 6:
  . selected features: 111
  . top features: abilit,alunn,didatt,docent,format,graduator,insegn,istruzion,paritar,scolast
 
# 7:
  . selected features: 238
  . top features: ambiental,bonif,discar,inquin,rif,smalt,acque,termovalorizz,ecoball,ambient
 
# 8:
  . selected features: 130
  . top features: elettr,energ,energet,gas,gasdott,nucl,petrol,benzin,prezz,baril
 
# 9:
  . selected features: 129
  . top features: accoglit,asil,clandestin,espulsion,extracomunitar,immigr,migrant,profug,sbarc,soggiorn
 
# 10:
  . selected features: 210
  . top features: aeroport,anas,autostrad,autostradal,ferrov,ferroviar,infrastruttur,strad,stradal,trasport
 
# 12:
  . selected features: 225
  . top features: criminal,penitenziar,poliz,giustiz,carc,reat,carcerar,magistr,giudiziar,deten
 
# 13:
  . selected features: 84
  . top features: card,famigl,povert,invalid,disabil,famil,nid,redd,autosufficit,ise
 
# 14:
  . selected features: 79
  . top features: abusiv,allogg,cond,ediliz,inquilin,locazion,sfratt,immobil,residenzial,affitt
 
# 15:
  . selected features: 108
  . top features: banc,bancar,impres,gioc,cred,calc,piccol,consob,risparm,pasc
 
# 16:
  . selected features: 148
  . top features: afghanistan,difes,milit,militar,trupp,armat,statunitens,caserm,isaf,kosov
 
# 17:
  . selected features: 108
  . top features: frequenz,rai,televis,tv,radiotelevis,telefon,emittent,pubblicitar,beauty,telecomun
 
# 18:
  . selected features: 96
  . top features: antidumping,calzatur,daz,doganal,semilavor,internazionalizz,calzaturier,cines,cin,dogan
 
# 19:
  . selected features: 180
  . top features: turc,palestines,israel,diplomat,internazional,europe,russ,union,libic,curd
 
# 20:
  . selected features: 92
  . top features: elettoral,elezion,amministr,federal,postal,enti,incar,consigl,costituzional,pubblic
 
# 21:
  . selected features: 140
  . top features: archeolog,cultural,idric,muse,parc,pompe,restaur,patrimon,artist,museal
 
# 23:
  . selected features: 65
  . top features: artist,conservator,cultural,music,musical,spettacol,teatr,edizion,liric,danz
 
In [11]:
tfidf = TfidfVectorizer(strip_accents=strip_accents,
                        lowercase=False,
                        preprocessor=preprocessor,
                        tokenizer=tokenizer_porter,
                        stop_words=stop,
                        min_df = 4,
                        vocabulary=X_names
                       )

final_features = tfidf.fit_transform(documents)
In [12]:
sns.heatmap(final_features.todense()[:,np.random.randint(0,final_features.shape[1],100)]==0, vmin=0, vmax=1, cbar=False).set_title('Sparse Matrix Sample')
Out[12]:
Text(0.5, 1.0, 'Sparse Matrix Sample')
In [13]:
print(final_features.toarray().shape)
(5248, 2612)
In [14]:
pipe_lr = make_pipeline(
                        tfidf,
                        SVC(kernel="linear", C=C,probability=True)
                        )
In [15]:
X=np.array(documents)
y=np.array(classes)
kfold = StratifiedKFold(n_splits=nfold,shuffle=True,random_state=1).split(X, y)

accuracys = []
scores= []
target_names=list(map(str,np.unique(classes)))

for k, (train, test) in enumerate(kfold):
    pipe_lr.fit(X[train], y[train])
    y_pred=pipe_lr.predict(X[test])
    y_prob=pipe_lr.predict_proba(X[test])
    y_test_array = pd.get_dummies(y[test], drop_first=False).values
    
    ## calcolo accuracy
    accuracy = pipe_lr.score(X[test], y[test])
    accuracys.append(accuracy)
    
    ## calcolo precision,recall,fscore
    auc = metrics.roc_auc_score(y[test], y_prob, multi_class="ovr")
    score=precision_recall_fscore_support(y_true=y[test], y_pred=y_pred, average="weighted")
    scores.append(score[0:3])
    
    print('--------------- Fold: %2d ---------------------'% (k+1))
    print()
    print("Accuracy:",  round(accuracy,2))
    print("Auc:", round(auc,2))
    print("Detail:")
    print(metrics.classification_report(y[test], y_pred))
    
    ## Plot confusion matrix
    conf_mat = confusion_matrix(y[test], y_pred)
    fig, ax = plt.subplots(figsize=(10,10))
    sns.heatmap(conf_mat, annot=True, fmt='d', ax=ax, cbar=False,cmap=plt.cm.Blues)
    ax.set(xlabel="Predicted", ylabel="Actual", xticklabels=target_names, 
           yticklabels=target_names, title="Confusion matrix")
    plt.yticks(rotation=0)
    
    print()
    
    ## Plot Roc
    fig, ax = plt.subplots(figsize=(10,10))
    for i in range(len(target_names)):
        fpr, tpr, thresholds = metrics.roc_curve(y_test_array[:,i],  
                               y_prob[:,i])
        ax.plot(fpr, tpr, lw=3, 
                  label='{0} (area={1:0.2f})'.format(target_names[i], 
                                  metrics.auc(fpr, tpr))
                   )
    ax.plot([0,1], [0,1], color='navy', lw=3, linestyle='--')
    ax.set(xlim=[-0.05,1.0], ylim=[0.0,1.05], 
              xlabel='False Positive Rate', 
              ylabel="True Positive Rate (Recall)", 
              title="Receiver operating characteristic")
    ax.legend(loc="lower right")
    ax.grid(True)
    
    ## Plot precision-recall curve
    fig, ax = plt.subplots(figsize=(10,10))
    for i in range(len(target_names)):
        precision, recall, thresholds = metrics.precision_recall_curve(
                     y_test_array[:,i], y_prob[:,i])
        ax.plot(recall, precision, lw=3, 
                   label='{0} (area={1:0.2f})'.format(target_names[i], 
                                      metrics.auc(recall, precision))
                  )
    ax.set(xlim=[0.0,1.05], ylim=[0.0,1.05], xlabel='Recall', 
              ylabel="Precision", title="Precision-Recall curve")
    ax.legend(loc="best")
    ax.grid(True)
    plt.show()
    
    ## select observation
    i = 0
    txt_instance = X[test][i]
    ## check true value and predicted value
    print("True:", y[test][i], "--> Pred:", y_pred[i], "| Prob:", round(np.max(y_prob[i]),2))
    ## show explanation
    explainer = lime_text.LimeTextExplainer(class_names=target_names)
    explained = explainer.explain_instance(txt_instance, 
                 pipe_lr.predict_proba, num_features=6,top_labels=2)
    explained.show_in_notebook(text=txt_instance, predict_proba=False)

    
arr = np.array(scores)

print("Overall results of the cross-validation procedure")
print()

print('\nCV accuracy: %.1f +/- %.1f' % (np.mean(accuracys)*100, np.std(accuracys)*100))
print('\nCV precision: %.1f +/- %.1f' % (np.mean(arr[:,0])*100, np.std(arr[:,0])*100))
print('\nCV recall: %.1f +/- %.1f' % (np.mean(arr[:,1])*100, np.std(arr[:,1])*100))
print('\nCV f1: %.1f +/- %.1f' % (np.mean(arr[:,2])*100, np.std(arr[:,2])*100))
c:\users\berto\appdata\local\programs\python\python38\lib\site-packages\sklearn\metrics\_classification.py:1221: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
--------------- Fold:  1 ---------------------

Accuracy: 0.74
Auc: 0.97
Detail:
              precision    recall  f1-score   support

           1       0.48      0.67      0.56        42
           2       0.65      0.73      0.69        15
           3       0.89      0.80      0.84        40
           4       1.00      0.86      0.92        21
           5       0.75      0.84      0.79        32
           6       0.85      0.81      0.83        27
           7       0.81      0.92      0.86        24
           8       0.75      0.82      0.78        11
           9       0.77      0.85      0.81        20
          10       0.86      0.86      0.86        50
          12       0.72      0.79      0.76        73
          13       0.67      0.36      0.47        11
          14       1.00      0.71      0.83         7
          15       0.68      0.65      0.67        40
          16       0.75      0.69      0.72        13
          17       0.80      0.40      0.53        10
          18       1.00      0.50      0.67         2
          19       0.84      0.67      0.74        24
          20       0.61      0.58      0.60        48
          21       0.73      0.67      0.70        12
          23       0.00      0.00      0.00         3

    accuracy                           0.74       525
   macro avg       0.74      0.68      0.70       525
weighted avg       0.75      0.74      0.74       525


True: 1 --> Pred: 1 | Prob: 0.84
--------------- Fold:  2 ---------------------

Accuracy: 0.73
Auc: 0.97
Detail:
              precision    recall  f1-score   support

           1       0.63      0.52      0.57        42
           2       0.46      0.40      0.43        15
           3       0.88      0.93      0.90        40
           4       0.80      0.76      0.78        21
           5       0.56      0.62      0.59        32
           6       0.85      0.85      0.85        27
           7       0.88      0.92      0.90        24
           8       0.71      0.91      0.80        11
           9       0.67      0.70      0.68        20
          10       0.86      0.88      0.87        50
          12       0.71      0.83      0.77        72
          13       0.75      0.82      0.78        11
          14       0.67      0.86      0.75         7
          15       0.61      0.47      0.54        40
          16       0.79      0.85      0.81        13
          17       0.86      0.60      0.71        10
          18       1.00      0.33      0.50         3
          19       0.68      0.54      0.60        24
          20       0.64      0.67      0.65        48
          21       0.75      0.75      0.75        12
          23       0.50      0.33      0.40         3

    accuracy                           0.73       525
   macro avg       0.73      0.69      0.70       525
weighted avg       0.72      0.73      0.72       525


True: 15 --> Pred: 20 | Prob: 0.22
--------------- Fold:  3 ---------------------

Accuracy: 0.69
Auc: 0.97
Detail:
              precision    recall  f1-score   support

           1       0.54      0.67      0.60        42
           2       0.67      0.53      0.59        15
           3       0.80      0.88      0.83        40
           4       0.84      0.80      0.82        20
           5       0.73      0.73      0.73        33
           6       0.86      0.89      0.88        28
           7       0.72      0.54      0.62        24
           8       0.69      0.82      0.75        11
           9       0.74      0.70      0.72        20
          10       0.82      0.74      0.78        50
          12       0.67      0.79      0.73        72
          13       0.70      0.64      0.67        11
          14       0.67      0.57      0.62         7
          15       0.51      0.53      0.52        40
          16       0.82      0.69      0.75        13
          17       0.86      0.60      0.71        10
          18       0.00      0.00      0.00         3
          19       0.67      0.61      0.64        23
          20       0.56      0.56      0.56        48
          21       0.56      0.42      0.48        12
          23       0.50      0.33      0.40         3

    accuracy                           0.69       525
   macro avg       0.66      0.62      0.64       525
weighted avg       0.69      0.69      0.68       525


True: 1 --> Pred: 1 | Prob: 0.39
--------------- Fold:  4 ---------------------

Accuracy: 0.69
Auc: 0.97
Detail:
              precision    recall  f1-score   support

           1       0.62      0.57      0.59        42
           2       0.62      0.36      0.45        14
           3       0.82      0.80      0.81        40
           4       0.71      0.75      0.73        20
           5       0.72      0.85      0.78        33
           6       0.83      0.89      0.86        28
           7       0.71      0.83      0.77        24
           8       0.90      0.82      0.86        11
           9       0.74      0.70      0.72        20
          10       0.91      0.86      0.89        50
          12       0.63      0.76      0.69        72
          13       0.46      0.55      0.50        11
          14       0.33      0.29      0.31         7
          15       0.58      0.53      0.55        40
          16       0.69      0.69      0.69        13
          17       0.62      0.50      0.56        10
          18       0.60      1.00      0.75         3
          19       0.58      0.65      0.61        23
          20       0.61      0.47      0.53        49
          21       0.45      0.42      0.43        12
          23       0.50      0.33      0.40         3

    accuracy                           0.69       525
   macro avg       0.65      0.65      0.64       525
weighted avg       0.68      0.69      0.68       525


True: 1 --> Pred: 12 | Prob: 0.39
--------------- Fold:  5 ---------------------

Accuracy: 0.69
Auc: 0.96
Detail:
              precision    recall  f1-score   support

           1       0.58      0.62      0.60        42
           2       0.33      0.50      0.40        14
           3       0.94      0.75      0.83        40
           4       0.72      0.65      0.68        20
           5       0.69      0.67      0.68        33
           6       0.77      0.86      0.81        28
           7       0.72      0.78      0.75        23
           8       0.64      0.82      0.72        11
           9       0.78      0.70      0.74        20
          10       0.90      0.90      0.90        50
          12       0.70      0.75      0.72        72
          13       0.45      0.45      0.45        11
          14       1.00      0.71      0.83         7
          15       0.54      0.65      0.59        40
          16       0.80      0.57      0.67        14
          17       0.57      0.40      0.47        10
          18       0.00      0.00      0.00         3
          19       0.68      0.65      0.67        23
          20       0.70      0.67      0.69        49
          21       0.62      0.42      0.50        12
          23       0.33      0.33      0.33         3

    accuracy                           0.69       525
   macro avg       0.64      0.61      0.62       525
weighted avg       0.70      0.69      0.69       525


True: 1 --> Pred: 1 | Prob: 0.61
c:\users\berto\appdata\local\programs\python\python38\lib\site-packages\sklearn\metrics\_classification.py:1221: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
--------------- Fold:  6 ---------------------

Accuracy: 0.67
Auc: 0.96
Detail:
              precision    recall  f1-score   support

           1       0.53      0.64      0.58        42
           2       0.36      0.36      0.36        14
           3       0.85      0.85      0.85        40
           4       0.95      0.90      0.92        20
           5       0.50      0.61      0.55        33
           6       0.75      0.78      0.76        27
           7       0.78      0.75      0.77        24
           8       0.75      0.82      0.78        11
           9       0.67      0.80      0.73        20
          10       0.86      0.86      0.86        51
          12       0.73      0.72      0.73        72
          13       0.40      0.36      0.38        11
          14       0.83      0.71      0.77         7
          15       0.55      0.55      0.55        40
          16       0.70      0.50      0.58        14
          17       0.71      0.50      0.59        10
          18       0.00      0.00      0.00         3
          19       0.62      0.65      0.64        23
          20       0.53      0.47      0.50        49
          21       0.75      0.55      0.63        11
          23       0.75      1.00      0.86         3

    accuracy                           0.67       525
   macro avg       0.65      0.64      0.64       525
weighted avg       0.67      0.67      0.67       525


True: 1 --> Pred: 1 | Prob: 0.87
--------------- Fold:  7 ---------------------

Accuracy: 0.7
Auc: 0.97
Detail:
              precision    recall  f1-score   support

           1       0.56      0.64      0.60        42
           2       0.50      0.50      0.50        14
           3       0.92      0.83      0.87        41
           4       0.82      0.90      0.86        20
           5       0.73      0.69      0.71        32
           6       0.69      0.81      0.75        27
           7       0.75      0.75      0.75        24
           8       0.91      0.91      0.91        11
           9       1.00      0.81      0.89        21
          10       0.78      0.82      0.80        51
          12       0.70      0.74      0.72        72
          13       0.53      0.73      0.62        11
          14       0.57      0.57      0.57         7
          15       0.47      0.46      0.47        39
          16       0.83      0.77      0.80        13
          17       0.73      0.73      0.73        11
          18       0.50      0.67      0.57         3
          19       0.76      0.70      0.73        23
          20       0.55      0.47      0.51        49
          21       0.67      0.55      0.60        11
          23       0.00      0.00      0.00         3

    accuracy                           0.70       525
   macro avg       0.67      0.67      0.66       525
weighted avg       0.70      0.70      0.69       525


True: 12 --> Pred: 12 | Prob: 0.18
--------------- Fold:  8 ---------------------

Accuracy: 0.71
Auc: 0.97
Detail:
              precision    recall  f1-score   support

           1       0.58      0.62      0.60        42
           2       1.00      0.50      0.67        14
           3       0.90      0.90      0.90        41
           4       0.79      0.71      0.75        21
           5       0.67      0.69      0.68        32
           6       0.88      0.78      0.82        27
           7       0.76      0.79      0.78        24
           8       0.75      0.60      0.67        10
           9       0.86      0.86      0.86        21
          10       0.81      0.86      0.84        51
          12       0.71      0.81      0.75        72
          13       0.70      0.70      0.70        10
          14       1.00      0.38      0.55         8
          15       0.58      0.55      0.56        40
          16       0.83      0.77      0.80        13
          17       0.75      0.60      0.67        10
          18       0.00      0.00      0.00         3
          19       0.60      0.65      0.63        23
          20       0.55      0.58      0.57        48
          21       0.50      0.73      0.59        11
          23       1.00      0.50      0.67         4

    accuracy                           0.71       525
   macro avg       0.72      0.65      0.67       525
weighted avg       0.72      0.71      0.71       525


True: 1 --> Pred: 1 | Prob: 0.63
--------------- Fold:  9 ---------------------

Accuracy: 0.73
Auc: 0.97
Detail:
              precision    recall  f1-score   support

           1       0.52      0.71      0.60        42
           2       0.64      0.50      0.56        14
           3       0.84      0.90      0.87        41
           4       0.85      0.81      0.83        21
           5       0.81      0.66      0.72        32
           6       0.96      0.85      0.90        27
           7       0.74      0.83      0.78        24
           8       0.80      0.80      0.80        10
           9       0.86      0.86      0.86        21
          10       0.85      0.80      0.83        51
          12       0.73      0.90      0.81        72
          13       0.44      0.40      0.42        10
          14       0.88      0.88      0.88         8
          15       0.72      0.53      0.61        40
          16       0.75      0.69      0.72        13
          17       0.75      0.60      0.67        10
          18       0.50      0.33      0.40         3
          19       0.59      0.70      0.64        23
          20       0.64      0.56      0.60        48
          21       0.71      0.45      0.56        11
          23       0.50      0.33      0.40         3

    accuracy                           0.73       524
   macro avg       0.72      0.67      0.69       524
weighted avg       0.74      0.73      0.73       524


True: 1 --> Pred: 19 | Prob: 0.38
--------------- Fold: 10 ---------------------

Accuracy: 0.73
Auc: 0.96
Detail:
              precision    recall  f1-score   support

           1       0.58      0.76      0.66        42
           2       0.67      0.57      0.62        14
           3       0.91      0.80      0.85        40
           4       0.89      0.81      0.85        21
           5       0.84      0.81      0.83        32
           6       0.81      0.81      0.81        27
           7       0.71      0.71      0.71        24
           8       0.75      0.60      0.67        10
           9       0.75      0.90      0.82        20
          10       0.88      0.86      0.87        51
          12       0.75      0.81      0.78        72
          13       0.56      0.50      0.53        10
          14       0.75      0.75      0.75         8
          15       0.53      0.47      0.50        40
          16       0.80      0.62      0.70        13
          17       0.57      0.80      0.67        10
          18       0.00      0.00      0.00         3
          19       0.71      0.71      0.71        24
          20       0.62      0.65      0.63        48
          21       0.62      0.42      0.50        12
          23       1.00      0.33      0.50         3

    accuracy                           0.73       524
   macro avg       0.70      0.65      0.66       524
weighted avg       0.73      0.73      0.72       524


True: 1 --> Pred: 1 | Prob: 0.81
Overall results of the cross-validation procedure


CV accuracy: 70.7 +/- 2.2

CV precision: 71.1 +/- 2.4

CV recall: 70.7 +/- 2.2

CV f1: 70.5 +/- 2.2
In [ ]: